package org.databandtech.flink.flatmapfunc;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.util.Collector;

public class Tokenizer implements FlatMapFunction<String, Tuple2<String, Integer>> {

	private static final long serialVersionUID = 8651165533356257123L;
	private String SPITBYREGEX;
	

	public Tokenizer(String splitregex) {
		super();
		if (splitregex.isEmpty())
		SPITBYREGEX = " ";
	}

	@Override
	public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
		String[] tokens = value.split(SPITBYREGEX);
		// emit the pairs
		for (String token : tokens) {
			if (token.length() > 0) {
				out.collect(new Tuple2<String, Integer>(token, 1));
			}
		}
	}

}
